home *** CD-ROM | disk | FTP | other *** search
Text File | 1989-11-16 | 36.9 KB | 1,353 lines |
- Newsgroups: comp.sources.misc
- subject: v09i005: 16 bit compress for MSDOS
- From: allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc)
- Reply-To: graham@tsmith.UUCP
-
- Posting-number: Volume 9, Issue 5
- Submitted-by: graham@tsmith.UUCP
- Archive-name: compress.ms
-
- Recently, there have been people looking for source for compress.c to run
- under MSDOS. Here is one that may fit the bill.
-
- Doug.
-
- ------------------------------ Cut Here ------------------------------------
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then feed it
- # into a shell via "sh file" or similar. To overwrite existing files,
- # type "sh file -c".
- # The tool that generated this appeared in the comp.sources.unix newsgroup;
- # send mail to comp-sources-unix@uunet.uu.net if you want that tool.
- # If this archive is complete, you will see the following message at the end:
- # "End of shell archive."
- # Contents: README makefile compress.c
- # Wrapped by graham@tsmith on Wed Nov 15 20:52:00 1989
- PATH=/bin:/usr/bin:/usr/ucb ; export PATH
- if test -f 'README' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'README'\"
- else
- echo shar: Extracting \"'README'\" \(609 characters\)
- sed "s/^X//" >'README' <<'END_OF_FILE'
- XHere is a version of compress 4.0 hacked for MSDOS. A makefile is provided
- Xwhich will compile it using Microsoft C, Turbo C, or Zortech C. The makefile
- Xwill need editing if other than the Microsoft compiler is used.
- XThe program requires about 400K to run. It takes the same command line
- Xargs as does the UNIX program of the same name, and should be compatible
- Xin all ways with that program. It will decode a 16 bit compressed file,
- Xand can generate the same. On my machine, it decodes about twice as quickly
- Xas the "u16" decompress program posted earlier to c.s.m.
- X
- XDoug Graham.
- Xuunet!mitel!sce!tsmith!graham
- END_OF_FILE
- if test 609 -ne `wc -c <'README'`; then
- echo shar: \"'README'\" unpacked with wrong size!
- fi
- # end of 'README'
- fi
- if test -f 'makefile' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'makefile'\"
- else
- echo shar: Extracting \"'makefile'\" \(1083 characters\)
- sed "s/^X//" >'makefile' <<'END_OF_FILE'
- X#
- X# Makefile for compress.
- X#
- X# If memory usage is a problem under DOS, you may want to do a
- X#
- X# "exemod compress.exe /MAX 0"
- X#
- X# in order to reduce the size of the near heap. If this is done on the
- X# Microsoft executable, memory requirements drop to about 380K from 410K
- X# Depending on how the other compilers manage their near/far heaps, this
- X# should have similar results there as well.
- X#
- X
- XDOSDEFS = -Di8088 -DMSDOS -DPROTO
- X
- X#
- X# Microsoft C 5.0 under MSDOS
- X#
- X# The resulting executable is faster by about 20% than either Turbo C,
- X# or Zortech C.
- X#
- Xcompress.exe: compress.c
- X cl -o compress.exe -W3 -Ox -DMSC $(DOSDEFS) compress.c
- X
- X#
- X# Turbo C 2.0 under MSDOS
- X#
- X# compress.exe: compress.c
- X# tcc -ecompress.exe -Z -O -G -w $(DOSDEFS) compress.c
- X
- X#
- X# Zortech C under MSDOS
- X#
- X# compress.exe: compress.c
- X# ztc -ocompress.exe -o $(DOSDEFS) compress.c
- X
- X#
- X# Sun OS 3.5.
- X# Compression is slightly slower than /usr/ucb/compress probably
- X# because the compiler is doing lots of "extl"'s. Decompression
- X# is slightly faster.
- X#
- X# compress: compress.c
- X# cc -O -DBSD4_2 -o compress compress.c
- END_OF_FILE
- if test 1083 -ne `wc -c <'makefile'`; then
- echo shar: \"'makefile'\" unpacked with wrong size!
- fi
- # end of 'makefile'
- fi
- if test -f 'compress.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'compress.c'\"
- else
- echo shar: Extracting \"'compress.c'\" \(32700 characters\)
- sed "s/^X//" >'compress.c' <<'END_OF_FILE'
- X/*
- X * Compress - data compression program
- X */
- Xstatic char rcs_ident[] = "@(#) compress,v 4.1 (DOS) 89/11/10 02:43:00 doug Release $";
- X
- X/*
- X * compress.c - File compression ala IEEE Computer, June 1984.
- X *
- X * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
- X * Jim McKie (decvax!mcvax!jim)
- X * Steve Davies (decvax!vax135!petsd!peora!srd)
- X * Ken Turkowski (decvax!decwrl!turtlevax!ken)
- X * James A. Woods (decvax!ihnp4!ames!jaw)
- X * Joe Orost (decvax!vax135!petsd!joe)
- X * Doug Graham (uunet!mitel!sce!tsmith!graham)
- X *
- X * Revision 4.1 (DOS) 89/11/10 02:43:00 doug
- X * Ported to MSDOS. Still works elsewhere, but maybe not as quickly.
- X * Removed as much long arithmetic as possible for speed on 16 bit machines.
- X * Use unsigned short's instead. Changed secondary hashing function to limit
- X * hash table size to 64K. This means table indexes can be 16 bit shorts.
- X * This compress will not generate codes from MAXMAXCODE (0xf000) thru
- X * 0xffff. Doesn't appear to hurt compression much. Removed speed hacks for
- X * other machines so I could understand the code. Added some for the i8088.
- X * Send CLEAR immediately when hash table fills instead of waiting for the
- X * compression ratio to drop. This is faster, and in some cases improves
- X * compression (but more often reduces it slightly). Junked the variable
- X * size hash table stuff because I am depending on 16 bit unsigned integer
- X * wrap around for indexing into hash table, so the table must have 2^16
- X * entries. Took out the XENIX_16 stuff. The DOS way ought to work on Xenix
- X * as well, and should be faster, but I don't have access to Xenix in order
- X * to find out. Added some extra error checking on decompression to try to
- X * avoid blowing the machine out of the water when decompressing a corrupt
- X * file. Add "okunlink" to avoid the problem of losing the output file as
- X * well as the input file if ^C is hit at the wrong time. Lot's of other
- X * cosmetic changes.
- X *
- X * Revision 4.0 85/07/30 12:50:00 joe
- X * Removed ferror() calls in output routine on every output except first.
- X * Prepared for release to the world.
- X *
- X * Revision 3.6 85/07/04 01:22:21 joe
- X * Remove much wasted storage by overlaying hash table with the tables
- X * used by decompress: tab_suffix[1<<BITS], stack[8000]. Updated USERMEM
- X * computations. Fixed dump_tab() DEBUG routine.
- X *
- X * Revision 3.5 85/06/30 20:47:21 jaw
- X * Change hash function to use exclusive-or. Rip out hash cache. These
- X * speedups render the megamemory version defunct, for now. Make decoder
- X * stack global. Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply.
- X *
- X * Revision 3.4 85/06/27 12:00:00 ken
- X * Get rid of all floating-point calculations by doing all compression ratio
- X * calculations in fixed point.
- X *
- X * Revision 3.3 85/06/24 21:53:24 joe
- X * Incorporate portability suggestion for M_XENIX. Got rid of text on #else
- X * and #endif lines. Cleaned up #ifdefs for vax and interdata.
- X *
- X * Revision 3.2 85/06/06 21:53:24 jaw
- X * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list.
- X * Default to "quiet" output (no compression statistics).
- X *
- X * Revision 3.1 85/05/12 18:56:13 jaw
- X * Integrate decompress() stack speedups (from early pointer mods by McKie).
- X * Repair multi-file USERMEM gaffe. Unify 'force' flags to mimic semantics
- X * of SVR2 'pack'. Streamline block-compress table clear logic. Increase
- X * output byte count by magic number size.
- X *
- X * Revision 3.0 84/11/27 11:50:00 petsd!joe
- X * Set HSIZE depending on BITS. Set BITS depending on USERMEM. Unrolled
- X * loops in clear routines. Added "-C" flag for 2.0 compatibility. Used
- X * unsigned compares on Perkin-Elmer. Fixed foreground check.
- X *
- X * Revision 2.7 84/11/16 19:35:39 ames!jaw
- X * Cache common hash codes based on input statistics; this improves
- X * performance for low-density raster images. Pass on #ifdef bundle
- X * from Turkowski.
- X *
- X * Revision 2.6 84/11/05 19:18:21 ames!jaw
- X * Vary size of hash tables to reduce time for small files.
- X * Tune PDP-11 hash function.
- X *
- X * Revision 2.5 84/10/30 20:15:14 ames!jaw
- X * Junk chaining; replace with the simpler (and, on the VAX, faster)
- X * double hashing, discussed within. Make block compression standard.
- X *
- X * Revision 2.4 84/10/16 11:11:11 ames!jaw
- X * Introduce adaptive reset for block compression, to boost the rate
- X * another several percent. (See mailing list notes.)
- X *
- X * Revision 2.3 84/09/22 22:00:00 petsd!joe
- X * Implemented "-B" block compress. Implemented REVERSE sorting of tab_next.
- X * Bug fix for last bits. Changed fwrite to putchar loop everywhere.
- X *
- X * Revision 2.2 84/09/18 14:12:21 ames!jaw
- X * Fold in news changes, small machine typedef from thomas,
- X * #ifdef interdata from joe.
- X *
- X * Revision 2.1 84/09/10 12:34:56 ames!jaw
- X * Configured fast table lookup for 32-bit machines.
- X * This cuts user time in half for b <= FBITS, and is useful for news batching
- X * from VAX to PDP sites. Also sped up decompress() [fwrite->putc] and
- X * added signal catcher [plus beef in writeerr()] to delete effluvia.
- X *
- X * Revision 2.0 84/08/28 22:00:00 petsd!joe
- X * Add check for foreground before prompting user. Insert maxbits into
- X * compressed file. Force file being uncompressed to end with ".Z".
- X * Added "-c" flag and "zcat". Prepared for release.
- X *
- X * Revision 1.10 84/08/24 18:28:00 turtlevax!ken
- X * Will only compress regular files (no directories), added a magic number
- X * header (plus an undocumented -n flag to handle old files without headers),
- X * added -f flag to force overwriting of possibly existing destination file,
- X * otherwise the user is prompted for a response. Will tack on a .Z to a
- X * filename if it doesn't have one when decompressing. Will only replace
- X * file if it was compressed.
- X *
- X * Revision 1.9 84/08/16 17:28:00 turtlevax!ken
- X * Removed scanargs(), getopt(), added .Z extension and unlimited number of
- X * filenames to compress. Flags may be clustered (-Ddvb12) or separated
- X * (-D -d -v -b 12), or combination thereof. Modes and other status is
- X * copied with copystat(). -O bug for 4.2 seems to have disappeared with
- X * 1.8.
- X *
- X * Revision 1.8 84/08/09 23:15:00 joe
- X * Made it compatible with vax version, installed jim's fixes/enhancements
- X *
- X * Revision 1.6 84/08/01 22:08:00 joe
- X * Sped up algorithm significantly by sorting the compress chain.
- X *
- X * Revision 1.5 84/07/13 13:11:00 srd
- X * Added C version of vax asm routines. Changed structure to arrays to
- X * save much memory. Do unsigned compares where possible (faster on
- X * Perkin-Elmer)
- X *
- X * Revision 1.4 84/07/05 03:11:11 thomas
- X * Clean up the code a little and lint it. (Lint complains about all
- X * the regs used in the asm, but I'm not going to "fix" this.)
- X *
- X * Revision 1.3 84/07/05 02:06:54 thomas
- X * Minor fixes.
- X *
- X * Revision 1.2 84/07/05 00:27:27 thomas
- X * Add variable bit length output.
- X *
- X */
- X
- X#include <stdio.h>
- X#include <ctype.h>
- X#include <signal.h>
- X#include <sys/types.h>
- X#include <sys/stat.h>
- X#ifndef __ZTC__
- X#include <malloc.h>
- X#endif
- X#ifndef BSD4_2
- X#include <stdlib.h>
- X#include <io.h>
- X#endif
- X#include <string.h>
- X#include <fcntl.h>
- X#ifdef MSDOS
- X#include <dos.h>
- X#endif
- X
- X#ifdef PROTO
- X/*
- X * Zortech appears to be missing this prototype, and MSC uses some
- X * silly structure as the second arg. Turbo C doesn't support this
- X * call at all.
- X */
- Xextern int utime(char *path, time_t times[]);
- X#endif
- X
- X#define BITS 16 /* max number of bits/code */
- X#define INIT_BITS 9 /* initial number of bits/code */
- X
- X#define MAXCODE(n_bits) ((code_t)((1L << (n_bits)) - 1))
- X
- X/*
- X * Magic numbers which should appear at the beginning of a compressed file.
- X */
- X#define MAGIC0 0x1f
- X#define MAGIC1 0x9d
- X
- X/*
- X * Defines for third byte of header
- X */
- X#define BIT_MASK 0x1f
- X#define BLOCK_MASK 0x80
- X
- X#if 0
- X#define CHECK_GAP 10000 /* ratio check interval */
- X#endif
- X
- X/*
- X * the next two codes should not be changed lightly, as they must not
- X * lie within the contiguous general code space.
- X */
- X#define FIRST 257 /* first free entry */
- X#define CLEAR 256 /* table clear output code */
- X
- X#define DE_STACKLEN 8192 /* Size of decoder stack */
- X
- X#define HSIZE (1L << 16) /* Size of the hash table. Don't change this */
- X
- Xtypedef unsigned char uchar;
- Xtypedef unsigned long ulong;
- Xtypedef unsigned short code_t;
- Xtypedef unsigned short hash_t;
- X
- X#ifdef PROTO
- X#define ARGS(x) x
- X#else
- X#define ARGS(x) ()
- X#endif
- X
- Xvoid main ARGS((int argc, char **argv));
- Xvoid Usage ARGS((void));
- Xvoid version ARGS((void));
- Xvoid compress ARGS((void));
- Xvoid decompress ARGS((void));
- Xvoid copystat ARGS((void));
- Xvoid writeerr ARGS((void));
- Xvoid cl_hash ARGS((void));
- Xvoid putcode ARGS((code_t code));
- Xvoid prratio ARGS((long num, long den));
- Xint ofopen ARGS((char *filename));
- Xint ifopen ARGS((char *filename));
- Xint check_magic ARGS((void));
- Xint need_clear ARGS((void));
- Xvoid onintr ARGS(());
- Xvoid oops ARGS(());
- Xint taballoc ARGS((void));
- Xvoid clearhash ARGS((void));
- X
- X/*
- X * block compression parameters -- after all codes are used up,
- X * and compression rate changes, start over.
- X */
- Xint block_compress = BLOCK_MASK;
- X
- Xint maxbits = BITS; /* user settable max # bits/code */
- Xint magic = 1; /* 3-byte magic number header */
- Xint zcat_flg = 0; /* Output on stdout */
- Xint verbose = 0; /* don't tell me about compression */
- Xint force = 0; /* Force overwrite of output file */
- Xint do_decomp = 0; /* Decompress rather than compress. */
- Xchar ofname[100]; /* Output file name */
- Xint foreground; /* Running in foreground? */
- Xint exit_stat = 0; /* Exit status */
- Xuchar bitbuf[BITS+2]; /* For (dis)assembling code bytes */
- Xint okunlink; /* OK for sig handler to unlink output file */
- Xchar *ifname;
- X
- X#ifdef i8088
- X
- Xuchar *de_stack;
- Xuchar far *charptr1;
- Xuchar far *codeptrs1[2];
- Xuchar far *codeptrs2[2];
- X
- X#define de_suffixof(i) charptr1[i]
- X#define de_prefixof(i) (*(code_t far *)&codeptrs1[i&1][i&~1])
- X
- X#define en_hashchar(i) charptr1[i]
- X#define en_hashent(i) (*(code_t far *)&codeptrs1[i&1][i&~1])
- X#define en_hashcode(i) (*(code_t far *)&codeptrs2[i&1][i&~1])
- X
- X#ifndef MK_FP
- X#define MK_FP(seg, ofs) \
- X ((void far *)(((ulong)(seg) << 16) | (unsigned)(ofs)))
- X#endif
- X
- X#define PARA 16 /* Size of a paragraph */
- X
- X/*
- X * Return a segment address which is the segment part of the normalized
- X * version of "fp" rounded upwards.
- X * I use this on the far pointers returned by "farmalloc". While
- X * they are probably already normalized, I have never seen this
- X * stated anywhere in the doc's.
- X *
- X * There is a lot of junk below which would be unecessary if only
- X * there were a reasonably compiler independent way of allocating
- X * a given number of PARAGRAPHS (like TC's allocmem). I can't find
- X * one though.
- X */
- X#define FP_SEGCEIL(fp) \
- X (FP_SEG(fp) + (FP_OFF(fp) + PARA - 1)/PARA)
- X
- X/*
- X * Allocate space for the tables used in {en,de}coding. These tables
- X * reside in the far heap. It may seem inefficient to be using far pointers
- X * for the base of these tables, because the offset portion will always be zero.
- X * We could just keep the segment address of the base, and then do something
- X * like:
- X * *MK_FP(baseseg, offset) = blahblah;
- X *
- X * whenever we need to access the table. This SHOULD be more efficient,
- X * but the compilers do not appear to generate very efficient code in this
- X * case. Huge pointers are not used, because they are slow, and because
- X * Zortech does not support them.
- X */
- X
- X#ifdef MSC
- X#define farmalloc(n) halloc(n, 1)
- X#endif
- X
- Xint taballoc()
- X{
- X char far *X;
- X
- X if (do_decomp) {
- X if ((de_stack = malloc(DE_STACKLEN)) == 0)
- X return (0);
- X }
- X else {
- X if ((X = farmalloc((HSIZE + PARA) * sizeof(code_t))) == 0)
- X return (0);
- X codeptrs2[0] = MK_FP(FP_SEGCEIL(X), 0);
- X codeptrs2[1] = MK_FP(FP_SEGCEIL(X) + HSIZE/PARA, 0);
- X }
- X
- X if ((X = farmalloc((HSIZE + PARA) * sizeof(char))) == 0)
- X return (0);
- X charptr1 = MK_FP(FP_SEGCEIL(X), 0);
- X
- X if ((X = farmalloc((HSIZE + PARA) * sizeof(code_t))) == 0)
- X return (0);
- X codeptrs1[0] = MK_FP(FP_SEGCEIL(X), 0);
- X codeptrs1[1] = MK_FP(FP_SEGCEIL(X) + HSIZE/PARA, 0);
- X
- X return (1);
- X}
- X
- X#else
- X
- Xuchar chartab1[HSIZE];
- Xcode_t codetab1[HSIZE];
- Xcode_t codetab2[HSIZE];
- X
- X#define de_suffixof(i) chartab1[i]
- X#define de_prefixof(i) codetab1[i]
- X#define de_stack (uchar *)codetab2
- X
- X#define en_hashchar(i) chartab1[i]
- X#define en_hashent(i) codetab1[i]
- X#define en_hashcode(i) codetab2[i]
- X
- X#endif
- X
- Xvoid Usage()
- X{
- X fprintf(stderr, "Usage: compress [-dfvcVnC] [-b maxbits] [file ...]\n");
- X fprintf(stderr, " -V => print Version\n");
- X fprintf(stderr, " -d => decompress\n");
- X fprintf(stderr, " -v => verbose\n");
- X fprintf(stderr, " -f => force overwrite of output file\n");
- X fprintf(stderr, " -n => no header: useful to uncompress old files\n");
- X fprintf(stderr, " -b maxbits => maxbits. Default %d\n", BITS);
- X fprintf(stderr, " -c => cat all output to stdout\n");
- X fprintf(stderr, " -C => generate output compatible with compress 2.0.\n");
- X}
- X
- X/*****************************************************************
- X * TAG( main )
- X *
- X * Algorithm from "A Technique for High Performance Data Compression",
- X * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
- X *
- X * Usage: compress [-dfvc] [-b bits] [file ...]
- X * Inputs:
- X * -d: If given, decompression is done instead.
- X *
- X * -c: Write output on stdout, don't remove original.
- X *
- X * -b: Parameter limits the max number of bits/code.
- X *
- X * -f: Forces output file to be generated, even if one already
- X * exists, and even if no space is saved by compressing.
- X * If -f is not used, the user will be prompted if stdin is
- X * a tty, otherwise, the output file will not be overwritten.
- X *
- X * -v: Write compression statistics
- X *
- X * file ...: Files to be compressed. If none specified, stdin
- X * is used.
- X * Outputs:
- X * file.Z: Compressed form of file with same mode, owner, and utimes
- X * or stdout (if stdin used as input)
- X *
- X * Assumptions:
- X * When filenames are given, replaces with the compressed version
- X * (.Z suffix) only if the file decreases in size.
- X * Algorithm:
- X * Modified Lempel-Ziv method (LZW). Basically finds common
- X * substrings and replaces them with a variable size code. This is
- X * deterministic, and can be done on the fly. Thus, the decompression
- X * procedure needs no input table, but tracks the way the table was built.
- X */
- X
- X#ifdef __ZTC__
- X#include <int.h>
- Xint silly_nonsense(struct INT_DATA *foo) {raise(SIGINT); return 1;}
- X#endif
- X
- X#define ARGVAL() (*++(*argv) || (--argc && *++argv))
- X
- Xvoid main(argc, argv)
- Xint argc;
- Xchar **argv;
- X{
- X char tempname[100], *cp;
- X
- X if (signal(SIGINT, SIG_IGN) != SIG_IGN) {
- X signal(SIGINT, onintr);
- X#ifdef __ZTC__
- X /*
- X * The "signal" call above isn't good enough for Zortech
- X */
- X int_intercept(0x23, silly_nonsense, 256);
- X#endif
- X#ifdef SIGSEGV
- X signal(SIGSEGV, oops);
- X#endif
- X if (isatty(2))
- X foreground = 1;
- X }
- X
- X#ifndef MSDOS
- X if ((cp = strrchr(argv[0], '/')) != 0)
- X cp++;
- X else
- X cp = argv[0];
- X#else
- X for (cp = argv[0]; *cp; cp++)
- X if (*cp == '/' || *cp == '\\')
- X argv[0] = cp + 1;
- X cp = strlwr(argv[0]);
- X#endif
- X /* Limited to 8 char filenames under DOS */
- X if (strncmp(cp, "uncompress", 8) == 0)
- X do_decomp = 1;
- X else if (strncmp(cp, "zcat", 4) == 0) {
- X do_decomp = 1;
- X zcat_flg = 1;
- X }
- X
- X#ifdef BSD4_2
- X /* 4.2BSD dependent - take it out if not */
- X setlinebuf(stderr);
- X#endif /* BSD4_2 */
- X
- X for (argc--, argv++; argc > 0 && **argv == '-'; argc--, argv++) {
- X while (*++(*argv)) { /* Process all flags in this arg */
- X switch (**argv) {
- X case 'V':
- X version();
- X break;
- X case 'v':
- X verbose = 1;
- X break;
- X case 'd':
- X do_decomp = 1;
- X break;
- X case 'f':
- X case 'F':
- X force = 1;
- X break;
- X case 'n':
- X magic = 0;
- X break;
- X case 'C':
- X block_compress = 0;
- X break;
- X case 'b':
- X if (!ARGVAL()) {
- X fprintf(stderr, "Missing maxbits\n");
- X Usage();
- X exit(1);
- X }
- X maxbits = atoi(*argv);
- X goto nextarg;
- X case 'c':
- X zcat_flg = 1;
- X break;
- X case 'q':
- X verbose = 0;
- X break;
- X default:
- X fprintf(stderr, "Unknown flag: '%c'; ", **argv);
- X Usage();
- X exit(1);
- X }
- X }
- Xnextarg:;
- X }
- X
- X#ifdef i8088
- X if (! taballoc()) {
- X fprintf(stderr, "compress: out of memory\n");
- X exit(1);
- X }
- X#endif
- X /*
- X * If no filename args, do standard input.
- X */
- X if (argc <= 0) {
- X if (! ifopen((char *)0) || ! ofopen((char *)0))
- X exit(1);
- X
- X ifname = "stdin";
- X
- X if (do_decomp) {
- X if (!check_magic())
- X exit(1);
- X decompress();
- X }
- X else {
- X compress();
- X if (verbose)
- X putc('\n', stderr);
- X }
- X exit(exit_stat);
- X }
- X
- X while (--argc >= 0) {
- X char *suf;
- X
- X ifname = *argv++;
- X suf = strrchr(ifname, '.');
- X
- X exit_stat = 0;
- X okunlink = 0;
- X
- X if (do_decomp) { /* DECOMPRESSION */
- X if (!suf || (strcmp(suf, ".Z") && strcmp(suf, ".z"))) {
- X strcpy(tempname, ifname);
- X strcat(tempname, ".Z");
- X ifname = tempname;
- X }
- X if (! ifopen(ifname) || !check_magic())
- X continue;
- X if (zcat_flg)
- X ofname[0] = '\0';
- X else {
- X strcpy(ofname, ifname);
- X ofname[strlen(ifname) - 2] = '\0';
- X }
- X if (!ofopen(ofname))
- X continue;
- X if (!zcat_flg && verbose)
- X fprintf(stderr, "%s: ", ifname);
- X decompress();
- X }
- X else { /* COMPRESSION */
- X if (suf && (!strcmp(suf, ".Z") || !strcmp(suf, ".z"))) {
- X fprintf(stderr, "%s: already has .Z suffix -- no change\n",
- X ifname);
- X continue;
- X }
- X if (! ifopen(ifname))
- X continue;
- X if (zcat_flg)
- X ofname[0] = 0;
- X else {
- X strcpy(ofname, ifname);
- X#ifndef MSDOS /* We'll let ofopen do the complaining */
- X#ifndef BSD4_2
- X if ((cp = strrchr(ofname, '/')) != NULL)
- X cp++;
- X else
- X cp = ofname;
- X if (strlen(cp) > 12) {
- X fprintf(stderr,"%s: filename too long to tack on .Z\n",cp);
- X continue;
- X }
- X#endif
- X#endif
- X strcat(ofname, ".Z");
- X }
- X if (! ofopen(ofname))
- X continue;
- X if (! zcat_flg && verbose)
- X fprintf(stderr, "%s: ", ifname);
- X compress();
- X }
- X
- X if (! zcat_flg) {
- X copystat();
- X if ((exit_stat == 1) || verbose)
- X putc('\n', stderr);
- X }
- X }
- X exit(exit_stat);
- X}
- X
- X/*
- X * compress stdin to stdout
- X *
- X * Algorithm: use open addressing double hashing (no chaining) on the
- X * prefix code / next character combination. We do a variant of Knuth's
- X * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
- X * secondary probe. Here, the modular division first probe is gives way
- X * to a faster exclusive-or manipulation. Also do block compression with
- X * an adaptive reset, whereby the code table is cleared when the compression
- X * ratio decreases, but after the table fills. The variable-length output
- X * codes are re-sized at this point, and a special CLEAR code is generated
- X * for the decompressor. Late addition: construct the table according to
- X * file size for noticeable speed improvement on small files. Please direct
- X * questions about this implementation to ames!jaw.
- X *
- X * Secondary hash function changed slightly for DOS. Hash table used to be
- X * > 64K. This is slow on a 16 bit machine because it means long arithmetic,
- X * and more complicated addressing of tables in the far address space.
- X * We now restrict the table size to 64K, and, so that the table does
- X * not overfill, restrict the codes that we will generate to MAXMAXCODE.
- X * This causes slightly poorer compression in some cases, but, interestingly
- X * enough, also causes better compression ratios in certain other cases.
- X * Yes, this is all compatible with other compresses.
- X */
- Xstatic long in_count; /* length of input */
- Xstatic long out_count; /* length of compressed output */
- Xstatic long ratio; /* in_count/out_count * 256 */
- Xstatic int n_bits; /* number of bits/code */
- Xstatic int n_bits8; /* bits/code times 8 */
- Xstatic int bitoffset; /* Offset into bitbuf */
- X
- X#define NOENT ((code_t)0xffff)
- X#define MAXMAXCODE ((code_t)0xf000)
- X
- X/*
- X * Clear out the hash table. We try to do this as quickly as possible, because
- X * it's running time dominates for small files. For big files, it doesn't matter
- X * much because it doesn't get called often. Now I understand why the original
- X * had a variable size hash table.
- X */
- Xvoid clearhash()
- X{
- X#ifdef i8088
- X register unsigned i;
- X code_t far *hp;
- X
- X hp = (code_t far *)codeptrs1[0];
- X i = (unsigned)(HSIZE/2);
- X do
- X *hp++ = NOENT;
- X while (--i > 0);
- X
- X hp = (code_t far *)codeptrs1[1];
- X i = (unsigned)(HSIZE/2);
- X do
- X *hp++ = NOENT;
- X while (--i > 0);
- X#else
- X /*
- X * WARNING: assumes that NOENT == 0xffff
- X */
- X memset((char *)codetab1, 0xff, HSIZE*sizeof(code_t));
- X#endif
- X}
- X
- X/*
- X * Compress stdin to stdout.
- X */
- Xvoid compress()
- X{
- X register hash_t i;
- X register code_t ent;
- X hash_t disp;
- X int c;
- X code_t freecode; /* first unused entry */
- X code_t maxcode; /* maximum code, given n_bits */
- X code_t maxmaxcode;
- X code_t k;
- X#ifdef CHECK_GAP
- X long checkpoint = 0;
- X#endif
- X
- X if (maxbits < INIT_BITS)
- X maxbits = INIT_BITS;
- X if (maxbits > BITS)
- X maxbits = BITS;
- X
- X if (magic) {
- X putchar(MAGIC0); putchar(MAGIC1);
- X putchar(maxbits | block_compress);
- X if (ferror(stdout))
- X writeerr();
- X }
- X
- X bitbuf[bitoffset = 0] = 0;
- X out_count = 3; /* includes 3-byte header mojo */
- X ratio = 0;
- X in_count = 1;
- X
- X n_bits = INIT_BITS;
- X n_bits8 = INIT_BITS << 3;
- X maxcode = MAXCODE(INIT_BITS);
- X maxmaxcode = MAXCODE(maxbits);
- X if (maxmaxcode > MAXMAXCODE)
- X maxmaxcode = MAXMAXCODE;
- X
- X freecode = ((block_compress) ? FIRST : 256);
- X
- X clearhash();
- X
- X ent = getchar();
- X
- X while ((c = getchar()) != EOF) {
- X in_count++;
- X
- X i = (hash_t)(c << 8) ^ ent; /* xor hashing */
- X
- X if ((k = en_hashent(i)) == ent && en_hashchar(i) == (uchar)c) {
- X ent = en_hashcode(i);
- X goto Continue;
- X }
- X
- X if (k != NOENT) {
- X /*
- X * New secondary hash for 64K table.
- X * Experiment shows that the shift by 6 works well.
- X * Beats me why. "disp" must be relatively
- X * prime to the table size. Since the table size is a
- X * power of 2, this means "disp" must be odd.
- X *
- X * Note that we do not do a range check before doing
- X * "i -= disp". It is assumed that the hash table size
- X * (HSIZE) is 64K, and that the type "hash_t" (which
- X * is unsigned short) is 16 bits. Thus it is impossible
- X * for "i" to be out of range. On a machine with something
- X * other than 16 bit shorts, this would have to change.
- X */
- X disp = ((hash_t)(c << 6) ^ ent) | 1;
- X do {
- X i -= disp;
- X if ((k = en_hashent(i)) == ent &&
- X en_hashchar(i) == (uchar)c) {
- X ent = en_hashcode(i);
- X goto Continue;
- X }
- X } while (k != NOENT);
- X }
- X
- X putcode(ent);
- X
- X if (freecode <= maxmaxcode) {
- X /*
- X * Add the new entry.
- X */
- X en_hashchar(i) = (uchar)c;
- X en_hashent(i) = ent;
- X en_hashcode(i) = freecode;
- X
- X /*
- X * If the next entry is going to be too big for the
- X * code size, then increase it, if possible.
- X */
- X if (freecode++ > maxcode) {
- X while (bitoffset)
- X putcode(0);
- X ++n_bits;
- X n_bits8 += 8;
- X maxcode = MAXCODE(n_bits);
- X }
- X }
- X#ifdef CHECK_GAP
- X else if (in_count >= checkpoint && block_compress) {
- X checkpoint = in_count + CHECK_GAP;
- X if (need_clear()) {
- X#else
- X else if (block_compress) {
- X if (1) {
- X#endif
- X putcode(CLEAR);
- X while (bitoffset > 0)
- X putcode(0);
- X clearhash();
- X freecode = FIRST;
- X maxcode = MAXCODE(INIT_BITS);
- X n_bits = INIT_BITS;
- X n_bits8 = n_bits << 3;
- X }
- X }
- X ent = c;
- XContinue:;
- X }
- X /*
- X * Put out the final code.
- X */
- X putcode(ent);
- X
- X /*
- X * At EOF, write the rest of the buffer.
- X */
- X if (bitoffset > 0)
- X fwrite(bitbuf, 1, (bitoffset + 7) / 8, stdout);
- X out_count += (bitoffset + 7) / 8;
- X fflush(stdout);
- X if (ferror(stdout))
- X writeerr();
- X
- X /*
- X * Print out stats on stderr
- X */
- X if (! zcat_flg && verbose) {
- X fprintf(stderr, "Compression: ");
- X prratio(in_count - out_count, in_count);
- X }
- X if (out_count > in_count) /* exit(2) if no savings */
- X exit_stat = 2;
- X}
- X
- X/*
- X * Output the given code. Assumes that chars are 8 bits.
- X * "n_bits" output bytes (containing 8 codes) are assembled
- X * in in "bitbuf", and then written out.
- X */
- Xvoid putcode(code)
- Xcode_t code;
- X{
- X register int i;
- X register uchar *bp;
- X
- X bp = &bitbuf[(bitoffset >> 3)];
- X i = bitoffset & 7;
- X bp[0] |= (uchar)(code << i);
- X bp[1] = (uchar)(code >>= (8 - i));
- X bp[2] = (uchar)(code >> 8);
- X
- X if ((bitoffset += n_bits) == n_bits8) {
- X bp = bitbuf;
- X i = n_bits;
- X out_count += i;
- X do
- X putchar(*bp++);
- X while (--i);
- X bitbuf[bitoffset = 0] = 0;
- X }
- X}
- X
- X#ifdef CHECK_GAP
- X/*
- X * Compute the current compression ratio, and return non-zero if
- X * it is has decreased since the last we checked.
- X *
- X * Don't use this anymore. Whenever the hash table fills,
- X * we send a CLEAR immediately (if block_compress). This is faster,
- X * and doesn't appear to affect the compression ratio much.
- X */
- Xint need_clear()
- X{
- X long rat;
- X
- X if (in_count > 0x007fffffL) { /* shift will overflow */
- X rat = out_count >> 8;
- X if (rat == 0) /* Don't divide by zero */
- X rat = 0x7fffffffL;
- X else
- X rat = in_count / rat;
- X } else
- X rat = (in_count << 8) / out_count;
- X
- X if (rat > ratio) {
- X ratio = rat;
- X return (0);
- X }
- X else {
- X ratio = 0;
- X return (1);
- X }
- X}
- X#endif
- X
- X/*
- X * Decompress stdin to stdout. This code assumes that chars are 8 bits.
- X */
- Xvoid decompress()
- X{
- X register uchar *stackp;
- X register code_t code;
- X code_t oldcode, incode;
- X code_t codemask;
- X code_t freecode; /* first unused entry */
- X code_t maxcode; /* maximum code, given n_bits */
- X code_t maxmaxcode;
- X int finchar;
- X int size; /* #bits in bitbuf */
- X int bitoff; /* Offset into bitbuf */
- X int n_bits; /* number of bits/code */
- X#ifndef i8088
- X register uchar *bp;
- X#endif
- X
- X n_bits = INIT_BITS;
- X maxcode = MAXCODE(INIT_BITS) - 1;
- X codemask = MAXCODE(INIT_BITS);
- X freecode = ((block_compress) ? FIRST : 256) - 1;
- X maxmaxcode = MAXCODE(maxbits);
- X
- X /*
- X * Read the first code into "oldcode"
- X */
- X if ((size = fread(bitbuf, 1, n_bits, stdin)) <= 0)
- X return;
- X size = (size << 3) - (n_bits - 1);
- X oldcode = (bitbuf[0] | (bitbuf[1] << 8)) & codemask;
- X bitoff = n_bits;
- X
- X /*
- X * First code must be 8 bits == char. Write it, and die
- X * if it can't be written.
- X */
- X putchar(finchar = oldcode);
- X if (ferror(stdout))
- X writeerr();
- X
- X stackp = de_stack;
- X
- X for ( ; ; ) {
- X if (bitoff >= size) {
- X if ((size = fread(bitbuf, 1, n_bits, stdin)) <= 0)
- X break;
- X /* Round size down to integral number of codes */
- X size = (size << 3) - (n_bits - 1);
- X bitoff = 0;
- X }
- X /*
- X * Read the next code into "code". On the 8088,
- X * a slight speedup is possible because it has the right byte
- X * order, and no alignment restrictions.
- X */
- X#ifdef i8088
- X code = ((code_t)(*(long *)&bitbuf[(bitoff >> 3)] >>
- X (bitoff&7))) & codemask;
- X#else
- X bp = &bitbuf[(bitoff >> 3)];
- X code = (code_t)(((bp[0] | (code_t)bp[1] << 8) |
- X (ulong)bp[2] << 16) >> (bitoff & 7)) & codemask;
- X#endif
- X bitoff += n_bits;
- X
- X if ((code == CLEAR) && block_compress) {
- X n_bits = INIT_BITS;
- X maxcode = MAXCODE(INIT_BITS) - 1;
- X codemask = MAXCODE(INIT_BITS);
- X freecode = (FIRST - 1) - 1;
- X size = 0;
- X continue;
- X }
- X incode = code;
- X
- X /*
- X * Special case for KwKwK string.
- X */
- X if (code > freecode) {
- X if (code != freecode + 1)
- X oops();
- X *stackp++ = (uchar)finchar;
- X code = oldcode;
- X }
- X
- X /*
- X * Generate output characters in reverse order
- X */
- X while (code >= 256) {
- X *stackp++ = de_suffixof(code);
- X code = de_prefixof(code);
- X }
- X
- X /*
- X * And write them out in the forward order.
- X */
- X putchar(finchar = code);
- X for (code = (stackp - de_stack) + 1; --code != 0; )
- X putchar(*--stackp);
- X
- X /*
- X * Generate the new entry.
- X */
- X if (freecode < maxmaxcode) {
- X if (++freecode > maxcode) {
- X if (++n_bits == maxbits)
- X maxcode = maxmaxcode;
- X else
- X maxcode = MAXCODE(n_bits) - 1;
- X size = 0;
- X codemask = MAXCODE(n_bits);
- X }
- X de_prefixof(freecode) = oldcode;
- X de_suffixof(freecode) = (uchar)finchar;
- X }
- X /*
- X * Remember previous code.
- X */
- X oldcode = incode;
- X }
- X fflush(stdout);
- X if (ferror(stdout))
- X writeerr();
- X}
- X
- X/*
- X * Check a compressed file to make sure it has the proper magic number
- X * at the beginning. Also read the third byte to determine "maxbits",
- X * and "block_compress".
- X */
- Xint check_magic()
- X{
- X if (! magic)
- X return (1);
- X if ((getchar() != MAGIC0) || (getchar() != MAGIC1)) {
- X fprintf(stderr, "%s: not in compressed format\n", ifname);
- X return (0);
- X }
- X maxbits = getchar(); /* set -b from file */
- X block_compress = maxbits & BLOCK_MASK;
- X maxbits &= BIT_MASK;
- X if (maxbits > BITS) {
- X fprintf(stderr,
- X "%s: compressed with %d bits, can only handle %d bits\n",
- X ifname, maxbits, BITS);
- X return (0);
- X }
- X return (1);
- X}
- X
- Xvoid writeerr()
- X{
- X perror(ofname);
- X fclose(stdout);
- X unlink(ofname);
- X exit(1);
- X}
- X
- X/*
- X * Copy the permissions and file times from the input file to the
- X * output.
- X */
- Xvoid copystat()
- X{
- X struct stat statbuf;
- X int mode;
- X void (* ss)();
- X#ifndef __TURBOC__
- X time_t timep[2];
- X#else
- X struct ftime filetime;
- X int fd;
- X#endif
- X
- X fclose(stdout);
- X if (stat(ifname, &statbuf)) { /* Get stat on input file */
- X perror(ifname);
- X return;
- X }
- X if ((statbuf.st_mode & S_IFMT) != S_IFREG) {
- X if (! verbose)
- X fprintf(stderr, "%s: ", ifname);
- X fprintf(stderr, " -- not a regular file: unchanged");
- X exit_stat = 1;
- X }
- X else if (statbuf.st_nlink > 1) {
- X if (! verbose)
- X fprintf(stderr, "%s: ", ifname);
- X fprintf(stderr, " -- has %d other links: unchanged",
- X statbuf.st_nlink - 1);
- X exit_stat = 1;
- X }
- X else if (exit_stat == 2 && !force) { /* No compression: remove file.Z */
- X if (verbose)
- X fprintf(stderr, " -- file unchanged");
- X }
- X else { /* ***** Successful Compression ***** */
- X exit_stat = 0;
- X mode = statbuf.st_mode & 07777;
- X#ifndef __ZTC__
- X if (chmod(ofname, mode)) /* Copy modes */
- X perror(ofname);
- X#endif
- X#ifndef MSDOS
- X chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */
- X#endif
- X#ifndef __TURBOC__
- X timep[0] = statbuf.st_atime;
- X timep[1] = statbuf.st_mtime;
- X utime(ofname, timep);
- X#else
- X if ((fd = open(ofname, O_RDONLY)) >= 0) {
- X if (getftime(fileno(stdin), &filetime) == 0)
- X setftime(fd, &filetime);
- X close(fd);
- X }
- X#endif
- X fclose(stdin);
- X ss = signal(SIGINT, SIG_IGN);
- X okunlink = 0;
- X /* ^C here would leave both input, and output files around */
- X if (unlink(ifname)) /* Remove input file */
- X perror(ifname);
- X signal(SIGINT, ss);
- X if (verbose)
- X fprintf(stderr, " -- replaced with %s", ofname);
- X return; /* Successful return */
- X }
- X
- X /* Unsuccessful return -- one of the tests failed */
- X
- X if (unlink(ofname))
- X perror(ofname);
- X}
- X
- Xvoid onintr()
- X{
- X fclose(stdout);
- X if (okunlink)
- X unlink(ofname);
- X exit(1);
- X}
- X
- Xvoid oops() /* wild pointer -- assume bad input */
- X{
- X if (do_decomp)
- X fprintf (stderr, "uncompress: %s is corrupt.\n", ifname);
- X fclose(stdout);
- X if (okunlink)
- X unlink(ofname);
- X exit(1);
- X}
- X
- Xvoid prratio(num, den)
- Xlong int num, den;
- X{
- X register int q; /* Doesn't need to be long */
- X
- X if (num > 214748L) /* 2147483647/10000 */
- X q = (int)(num / (den / 10000L));
- X else
- X q = (int)(10000L * num / den); /* Long calculations, though */
- X if (q < 0) {
- X putc('-', stderr);
- X q = -q;
- X }
- X fprintf(stderr, "%d.%02d%%", q / 100, q % 100);
- X}
- X
- Xvoid version()
- X{
- X fprintf(stderr, "%s\n", rcs_ident);
- X fprintf(stderr, "BITS = %d\n", BITS);
- X}
- X
- X/*
- X * Open the file "ofname" for binary output with possible check
- X * for overwrite. If all goes well, return non-zero, else zero.
- X */
- Xint ofopen(filename)
- Xchar *filename;
- X{
- X static char IOoutbuf[8192];
- X struct stat statbuf;
- X
- X if (filename && !*filename)
- X filename = 0;
- X
- X /*
- X * Check for overwrite of existing file
- X */
- X if (filename && !force && stat(filename, &statbuf) == 0) {
- X char response[2];
- X response[0] = 'n';
- X fprintf(stderr, "%s already exists;", filename);
- X if (foreground) {
- X fprintf(stderr, " do you wish to overwrite %s (y or n)? ", filename);
- X fflush(stderr);
- X read(2, response, 2);
- X while (response[1] != '\n') {
- X if (read(2, response+1, 1) < 0) { /* Ack! */
- X perror("stderr");
- X break;
- X }
- X }
- X }
- X if (response[0] != 'y') {
- X fprintf(stderr, "\tnot overwritten\n");
- X return (0);
- X }
- X }
- X
- X okunlink = 1;
- X /*
- X * Open the output file.
- X */
- X if (filename && !freopen(filename, "wb", stdout)) {
- X perror(filename);
- X return (0);
- X }
- X#ifdef O_BINARY
- X setmode(fileno(stdout), O_BINARY);
- X#else
- X#ifdef __ZTC__
- X /*
- X * I'm sure there must be a better way in Zortech C to change the
- X * mode of an already opened file, but I can't find it. It doesn't
- X * have a "setmode" call it seems.
- X */
- X stdout->_flag &= ~_IOTRAN;
- X#endif
- X#endif
- X setvbuf(stdout, IOoutbuf, _IOFBF, sizeof(IOoutbuf));
- X return (1);
- X}
- X
- Xifopen(filename)
- Xchar *filename;
- X{
- X static char IOinbuf[8192];
- X
- X if (filename && !freopen(filename, "rb", stdin)) {
- X perror(filename);
- X return (0);
- X }
- X#ifdef O_BINARY
- X setmode(fileno(stdin), O_BINARY);
- X#else
- X#ifdef __ZTC__
- X stdin->_flag &= ~_IOTRAN;
- X#endif
- X#endif
- X setvbuf(stdin, IOinbuf, _IOFBF, sizeof(IOinbuf));
- X return (1);
- X}
- END_OF_FILE
- if test 32700 -ne `wc -c <'compress.c'`; then
- echo shar: \"'compress.c'\" unpacked with wrong size!
- fi
- # end of 'compress.c'
- fi
- echo shar: End of shell archive.
- exit 0
-
-